{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b7fc99c3",
   "metadata": {},
   "source": [
    "# Use Implict Quantile Network to Play Pong\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2b23c9f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import copy\n",
    "import logging\n",
    "import itertools\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym.wrappers.atari_preprocessing import AtariPreprocessing\n",
    "from gym.wrappers.frame_stack import FrameStack\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow import losses\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import models\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "693bdf1e",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ab1cb62",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:00:52 [INFO] env: <AtariPreprocessing<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>\n",
      "00:00:52 [INFO] action_space: Discrete(6)\n",
      "00:00:52 [INFO] observation_space: : Box([[[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]], [[[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]], (4, 84, 84), uint8)\n",
      "00:00:52 [INFO] reward_range: (-inf, inf)\n",
      "00:00:52 [INFO] metadata: {'render.modes': ['human', 'rgb_array']}\n",
      "00:00:52 [INFO] num_stack: 4\n",
      "00:00:52 [INFO] lz4_compress: False\n",
      "00:00:52 [INFO] frames: deque([], maxlen=4)\n",
      "00:00:52 [INFO] id: PongNoFrameskip-v4\n",
      "00:00:52 [INFO] entry_point: gym.envs.atari:AtariEnv\n",
      "00:00:52 [INFO] reward_threshold: None\n",
      "00:00:52 [INFO] nondeterministic: False\n",
      "00:00:52 [INFO] max_episode_steps: 400000\n",
      "00:00:52 [INFO] _kwargs: {'game': 'pong', 'obs_type': 'image', 'frameskip': 1}\n",
      "00:00:52 [INFO] _env_name: PongNoFrameskip\n"
     ]
    }
   ],
   "source": [
    "env = FrameStack(AtariPreprocessing(gym.make('PongNoFrameskip-v4')),\n",
    "        num_stack=4)\n",
    "env.env.env.unwrapped.np_random.seed(0) # set seed for noops\n",
    "env.env.env.unwrapped.unwrapped.seed(0) # set seed for AtariEnv\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3747e919",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5eb74be3",
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9e183fc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Net(keras.Model):\n",
    "    def __init__(self, action_n, sample_count, cosine_count):\n",
    "        super().__init__()\n",
    "        self.cosine_count = cosine_count\n",
    "        self.conv = keras.Sequential([\n",
    "                keras.layers.Permute((2, 3, 1), input_shape=(4, 84, 84)),\n",
    "                layers.Conv2D(32, kernel_size=8, strides=4, activation=nn.relu),\n",
    "                layers.Conv2D(64, kernel_size=4, strides=2, activation=nn.relu),\n",
    "                layers.Conv2D(64, kernel_size=3, strides=1, activation=nn.relu),\n",
    "                layers.Reshape((1, 3136))])\n",
    "        self.emb = keras.Sequential([\n",
    "                layers.Dense(3136, activation=nn.relu,\n",
    "                input_shape=(sample_count, cosine_count))])\n",
    "        self.fc = keras.Sequential([\n",
    "                layers.Dense(512, activation=nn.relu),\n",
    "                layers.Dense(action_n),\n",
    "                layers.Permute((2, 1))])\n",
    "\n",
    "    def call(self, input_tensor, cumprob_tensor):\n",
    "        logit_tensor = self.conv(input_tensor)\n",
    "        index_tensor = tf.range(1, self.cosine_count + 1, dtype=tf.float32)[\n",
    "                np.newaxis, np.newaxis, :]\n",
    "        cosine_tensor = tf.math.cos(index_tensor * np.pi * cumprob_tensor)\n",
    "        emb_tensor = self.emb(cosine_tensor)\n",
    "        prod_tensor = logit_tensor * emb_tensor\n",
    "        output_tensor = self.fc(prod_tensor)\n",
    "        return output_tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a2b9a656",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Agent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.epsilon = 1.\n",
    "\n",
    "        self.replayer = DQNReplayer(capacity=100000)\n",
    "\n",
    "        self.sample_count = 8\n",
    "        self.evaluate_net = self.build_net(action_n=self.action_n,\n",
    "                sample_count=self.sample_count)\n",
    "        self.target_net = self.build_net(action_n=self.action_n,\n",
    "                sample_count=self.sample_count)\n",
    "        \n",
    "    def build_net(self, action_n, sample_count, cosine_count=64):\n",
    "        net = Net(action_n, sample_count, cosine_count)\n",
    "        loss = losses.Huber(reduction=\"none\")\n",
    "        optimizer = optimizers.Adam(0.0001)\n",
    "        net.compile(loss=loss, optimizer=optimizer)\n",
    "        return net\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = tf.convert_to_tensor(np.array(observation)[np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        prob_tensor = tf.random.uniform((1, self.sample_count, 1))\n",
    "        q_component_tensor = self.evaluate_net(state_tensor, prob_tensor)\n",
    "        q_tensor = tf.reduce_mean(q_component_tensor, axis=2)\n",
    "        action_tensor = tf.math.argmax(q_tensor, axis=1)\n",
    "        actions = action_tensor.numpy()\n",
    "        action = actions[0]\n",
    "        if self.mode == 'train':\n",
    "            if np.random.rand() < self.epsilon:\n",
    "                action = np.random.randint(0, self.action_n)\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 1024 and self.replayer.count % 10 == 0:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        average_weights = [(1. - learning_rate) * t + learning_rate * e for t, e\n",
    "                in zip(target_net.get_weights(), evaluate_net.get_weights())]\n",
    "        target_net.set_weights(average_weights)\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        batch_size = 32\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(batch_size)\n",
    "        state_tensor = tf.convert_to_tensor(states, dtype=tf.float32)\n",
    "        reward_tensor = tf.convert_to_tensor(rewards[:, np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        done_tensor = tf.convert_to_tensor(dones[:, np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        next_state_tensor = tf.convert_to_tensor(next_states, dtype=tf.float32)\n",
    "\n",
    "        # calculate target\n",
    "        next_cumprob_tensor = tf.random.uniform((batch_size, self.sample_count, 1))\n",
    "        next_q_component_tensor = self.evaluate_net(next_state_tensor,\n",
    "                next_cumprob_tensor)\n",
    "        next_q_tensor = tf.reduce_mean(next_q_component_tensor, axis=2)\n",
    "        next_action_tensor = tf.math.argmax(next_q_tensor, axis=1)\n",
    "        next_actions = next_action_tensor.numpy()\n",
    "        next_cumprob_tensor = tf.random.uniform((batch_size, self.sample_count, 1))\n",
    "        all_next_q_quantile_tensor = self.target_net(next_state_tensor,\n",
    "                next_cumprob_tensor)\n",
    "        indices = [[idx, next_action] for idx, next_action in\n",
    "                enumerate(next_actions)]\n",
    "        next_q_quantile_tensor = tf.gather_nd(all_next_q_quantile_tensor,\n",
    "                indices)\n",
    "        target_quantile_tensor = reward_tensor + self.gamma \\\n",
    "                * next_q_quantile_tensor * (1. - done_tensor)\n",
    "\n",
    "        with tf.GradientTape() as tape:\n",
    "            cumprob_tensor = tf.random.uniform((batch_size,\n",
    "                    self.sample_count, 1))\n",
    "            all_q_quantile_tensor = self.evaluate_net(state_tensor,\n",
    "                    cumprob_tensor)\n",
    "            indices = [[idx, action] for idx, action in enumerate(actions)]\n",
    "            q_quantile_tensor = tf.gather_nd(all_q_quantile_tensor, indices)\n",
    "            target_quantile_tensor = target_quantile_tensor[:, np.newaxis, :]\n",
    "            q_quantile_tensor = q_quantile_tensor[:, :, np.newaxis]\n",
    "            td_error_tensor = target_quantile_tensor - q_quantile_tensor\n",
    "            abs_td_error_tensor = tf.math.abs(td_error_tensor)\n",
    "            hubor_delta = 1.\n",
    "            hubor_loss_tensor = tf.where(abs_td_error_tensor < hubor_delta,\n",
    "                    0.5 * tf.square(td_error_tensor),\n",
    "                    hubor_delta * (abs_td_error_tensor - 0.5 * hubor_delta))\n",
    "            comparison_tensor = tf.cast(td_error_tensor < 0, dtype=tf.float32)\n",
    "            quantile_regression_tensor = tf.math.abs(cumprob_tensor -\n",
    "                    comparison_tensor)\n",
    "            quantile_huber_loss_tensor = tf.reduce_mean(tf.reduce_sum(\n",
    "                    hubor_loss_tensor * quantile_regression_tensor, axis=-1),\n",
    "                    axis=1)\n",
    "            loss_tensor = tf.reduce_mean(quantile_huber_loss_tensor)\n",
    "        grads = tape.gradient(loss_tensor, self.evaluate_net.variables)\n",
    "        self.evaluate_net.optimizer.apply_gradients(\n",
    "                zip(grads, self.evaluate_net.variables))\n",
    "\n",
    "        self.update_net(self.target_net, self.evaluate_net)\n",
    "\n",
    "        self.epsilon = max(self.epsilon - 1e-5, 0.05)\n",
    "\n",
    "\n",
    "agent = Agent(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f02b76e5",
   "metadata": {},
   "source": [
    "Train & Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "23d77d15",
   "metadata": {},
   "outputs": [],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "34ae8868",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:00:53 [INFO] ==== train ====\n",
      "00:01:17 [DEBUG] train episode 0: reward = -18.00, steps = 1208\n",
      "00:01:51 [DEBUG] train episode 1: reward = -19.00, steps = 981\n",
      "00:02:24 [DEBUG] train episode 2: reward = -21.00, steps = 940\n",
      "00:02:53 [DEBUG] train episode 3: reward = -21.00, steps = 819\n",
      "00:03:30 [DEBUG] train episode 4: reward = -19.00, steps = 1037\n",
      "00:04:02 [DEBUG] train episode 5: reward = -21.00, steps = 875\n",
      "00:04:33 [DEBUG] train episode 6: reward = -21.00, steps = 851\n",
      "00:05:07 [DEBUG] train episode 7: reward = -21.00, steps = 939\n",
      "00:05:39 [DEBUG] train episode 8: reward = -20.00, steps = 875\n",
      "00:06:10 [DEBUG] train episode 9: reward = -21.00, steps = 853\n",
      "00:06:41 [DEBUG] train episode 10: reward = -21.00, steps = 852\n",
      "00:07:15 [DEBUG] train episode 11: reward = -21.00, steps = 940\n",
      "00:07:45 [DEBUG] train episode 12: reward = -21.00, steps = 848\n",
      "00:08:14 [DEBUG] train episode 13: reward = -21.00, steps = 806\n",
      "00:08:44 [DEBUG] train episode 14: reward = -20.00, steps = 836\n",
      "00:09:17 [DEBUG] train episode 15: reward = -21.00, steps = 939\n",
      "00:09:51 [DEBUG] train episode 16: reward = -19.00, steps = 931\n",
      "00:10:20 [DEBUG] train episode 17: reward = -21.00, steps = 791\n",
      "00:10:57 [DEBUG] train episode 18: reward = -20.00, steps = 1029\n",
      "00:11:29 [DEBUG] train episode 19: reward = -20.00, steps = 899\n",
      "00:12:06 [DEBUG] train episode 20: reward = -19.00, steps = 1022\n",
      "00:12:37 [DEBUG] train episode 21: reward = -20.00, steps = 862\n",
      "00:13:08 [DEBUG] train episode 22: reward = -21.00, steps = 838\n",
      "00:13:40 [DEBUG] train episode 23: reward = -21.00, steps = 869\n",
      "00:14:12 [DEBUG] train episode 24: reward = -20.00, steps = 891\n",
      "00:14:43 [DEBUG] train episode 25: reward = -20.00, steps = 837\n",
      "00:15:33 [DEBUG] train episode 26: reward = -18.00, steps = 1216\n",
      "00:16:05 [DEBUG] train episode 27: reward = -20.00, steps = 878\n",
      "00:16:44 [DEBUG] train episode 28: reward = -19.00, steps = 1036\n",
      "00:17:21 [DEBUG] train episode 29: reward = -19.00, steps = 996\n",
      "00:17:53 [DEBUG] train episode 30: reward = -21.00, steps = 851\n",
      "00:18:31 [DEBUG] train episode 31: reward = -20.00, steps = 1007\n",
      "00:19:05 [DEBUG] train episode 32: reward = -21.00, steps = 848\n",
      "00:19:39 [DEBUG] train episode 33: reward = -21.00, steps = 926\n",
      "00:20:12 [DEBUG] train episode 34: reward = -21.00, steps = 900\n",
      "00:20:44 [DEBUG] train episode 35: reward = -21.00, steps = 868\n",
      "00:21:30 [DEBUG] train episode 36: reward = -18.00, steps = 1177\n",
      "00:22:07 [DEBUG] train episode 37: reward = -20.00, steps = 1007\n",
      "00:22:46 [DEBUG] train episode 38: reward = -19.00, steps = 1002\n",
      "00:23:19 [DEBUG] train episode 39: reward = -21.00, steps = 884\n",
      "00:23:52 [DEBUG] train episode 40: reward = -21.00, steps = 880\n",
      "00:24:25 [DEBUG] train episode 41: reward = -20.00, steps = 895\n",
      "00:25:05 [DEBUG] train episode 42: reward = -19.00, steps = 1076\n",
      "00:25:41 [DEBUG] train episode 43: reward = -21.00, steps = 968\n",
      "00:26:10 [DEBUG] train episode 44: reward = -21.00, steps = 763\n",
      "00:26:40 [DEBUG] train episode 45: reward = -21.00, steps = 822\n",
      "00:27:16 [DEBUG] train episode 46: reward = -20.00, steps = 956\n",
      "00:27:52 [DEBUG] train episode 47: reward = -20.00, steps = 957\n",
      "00:28:25 [DEBUG] train episode 48: reward = -21.00, steps = 882\n",
      "00:28:58 [DEBUG] train episode 49: reward = -21.00, steps = 881\n",
      "00:29:35 [DEBUG] train episode 50: reward = -19.00, steps = 936\n",
      "00:30:11 [DEBUG] train episode 51: reward = -20.00, steps = 963\n",
      "00:30:45 [DEBUG] train episode 52: reward = -21.00, steps = 895\n",
      "00:31:23 [DEBUG] train episode 53: reward = -20.00, steps = 976\n",
      "00:31:58 [DEBUG] train episode 54: reward = -20.00, steps = 914\n",
      "00:32:34 [DEBUG] train episode 55: reward = -20.00, steps = 929\n",
      "00:33:09 [DEBUG] train episode 56: reward = -21.00, steps = 905\n",
      "00:33:40 [DEBUG] train episode 57: reward = -21.00, steps = 785\n",
      "00:34:18 [DEBUG] train episode 58: reward = -20.00, steps = 963\n",
      "00:34:54 [DEBUG] train episode 59: reward = -21.00, steps = 911\n",
      "00:35:39 [DEBUG] train episode 60: reward = -20.00, steps = 1123\n",
      "00:36:12 [DEBUG] train episode 61: reward = -20.00, steps = 839\n",
      "00:36:47 [DEBUG] train episode 62: reward = -21.00, steps = 843\n",
      "00:37:20 [DEBUG] train episode 63: reward = -21.00, steps = 822\n",
      "00:38:01 [DEBUG] train episode 64: reward = -19.00, steps = 982\n",
      "00:38:35 [DEBUG] train episode 65: reward = -21.00, steps = 830\n",
      "00:39:26 [DEBUG] train episode 66: reward = -19.00, steps = 1224\n",
      "00:40:03 [DEBUG] train episode 67: reward = -21.00, steps = 884\n",
      "00:40:36 [DEBUG] train episode 68: reward = -21.00, steps = 787\n",
      "00:41:13 [DEBUG] train episode 69: reward = -21.00, steps = 878\n",
      "00:41:55 [DEBUG] train episode 70: reward = -19.00, steps = 1002\n",
      "00:42:32 [DEBUG] train episode 71: reward = -21.00, steps = 848\n",
      "00:43:11 [DEBUG] train episode 72: reward = -20.00, steps = 915\n",
      "00:43:52 [DEBUG] train episode 73: reward = -21.00, steps = 841\n",
      "00:44:33 [DEBUG] train episode 74: reward = -21.00, steps = 824\n",
      "00:45:19 [DEBUG] train episode 75: reward = -21.00, steps = 911\n",
      "00:46:01 [DEBUG] train episode 76: reward = -21.00, steps = 817\n",
      "00:46:54 [DEBUG] train episode 77: reward = -19.00, steps = 1057\n",
      "00:47:47 [DEBUG] train episode 78: reward = -18.00, steps = 1042\n",
      "00:48:32 [DEBUG] train episode 79: reward = -21.00, steps = 867\n",
      "00:49:17 [DEBUG] train episode 80: reward = -21.00, steps = 868\n",
      "00:50:02 [DEBUG] train episode 81: reward = -20.00, steps = 894\n",
      "00:50:59 [DEBUG] train episode 82: reward = -21.00, steps = 1091\n",
      "00:51:49 [DEBUG] train episode 83: reward = -21.00, steps = 955\n",
      "00:52:32 [DEBUG] train episode 84: reward = -21.00, steps = 808\n",
      "00:53:27 [DEBUG] train episode 85: reward = -20.00, steps = 1041\n",
      "00:54:14 [DEBUG] train episode 86: reward = -21.00, steps = 881\n",
      "00:55:02 [DEBUG] train episode 87: reward = -20.00, steps = 900\n",
      "00:55:52 [DEBUG] train episode 88: reward = -20.00, steps = 923\n",
      "00:56:34 [DEBUG] train episode 89: reward = -21.00, steps = 788\n",
      "00:57:20 [DEBUG] train episode 90: reward = -21.00, steps = 851\n",
      "00:58:04 [DEBUG] train episode 91: reward = -21.00, steps = 820\n",
      "00:58:58 [DEBUG] train episode 92: reward = -20.00, steps = 985\n",
      "00:59:48 [DEBUG] train episode 93: reward = -21.00, steps = 911\n",
      "01:00:38 [DEBUG] train episode 94: reward = -21.00, steps = 925\n",
      "01:01:30 [DEBUG] train episode 95: reward = -21.00, steps = 932\n",
      "01:02:20 [DEBUG] train episode 96: reward = -20.00, steps = 897\n",
      "01:03:23 [DEBUG] train episode 97: reward = -19.00, steps = 1122\n",
      "01:04:11 [DEBUG] train episode 98: reward = -21.00, steps = 817\n",
      "01:05:00 [DEBUG] train episode 99: reward = -21.00, steps = 825\n",
      "01:06:01 [DEBUG] train episode 100: reward = -20.00, steps = 1022\n",
      "01:06:55 [DEBUG] train episode 101: reward = -21.00, steps = 913\n",
      "01:07:48 [DEBUG] train episode 102: reward = -20.00, steps = 884\n",
      "01:08:46 [DEBUG] train episode 103: reward = -19.00, steps = 960\n",
      "01:09:49 [DEBUG] train episode 104: reward = -20.00, steps = 1039\n",
      "01:10:40 [DEBUG] train episode 105: reward = -21.00, steps = 852\n",
      "01:11:33 [DEBUG] train episode 106: reward = -21.00, steps = 866\n",
      "01:12:40 [DEBUG] train episode 107: reward = -21.00, steps = 1094\n",
      "01:15:46 [DEBUG] train episode 108: reward = -21.00, steps = 791\n",
      "01:22:24 [DEBUG] train episode 109: reward = -20.00, steps = 999\n",
      "01:27:41 [DEBUG] train episode 110: reward = -21.00, steps = 789\n",
      "01:35:42 [DEBUG] train episode 111: reward = -20.00, steps = 1164\n",
      "01:42:33 [DEBUG] train episode 112: reward = -20.00, steps = 975\n",
      "01:50:00 [DEBUG] train episode 113: reward = -19.00, steps = 1038\n",
      "01:56:35 [DEBUG] train episode 114: reward = -21.00, steps = 910\n",
      "02:02:34 [DEBUG] train episode 115: reward = -21.00, steps = 818\n",
      "02:09:58 [DEBUG] train episode 116: reward = -21.00, steps = 1036\n",
      "02:17:31 [DEBUG] train episode 117: reward = -20.00, steps = 1064\n",
      "02:23:28 [DEBUG] train episode 118: reward = -21.00, steps = 841\n",
      "02:30:00 [DEBUG] train episode 119: reward = -21.00, steps = 929\n",
      "02:37:19 [DEBUG] train episode 120: reward = -20.00, steps = 1026\n",
      "02:45:27 [DEBUG] train episode 121: reward = -19.00, steps = 1130\n",
      "02:54:23 [DEBUG] train episode 122: reward = -19.00, steps = 1232\n",
      "03:01:08 [DEBUG] train episode 123: reward = -21.00, steps = 917\n",
      "03:09:07 [DEBUG] train episode 124: reward = -20.00, steps = 1082\n",
      "03:16:12 [DEBUG] train episode 125: reward = -20.00, steps = 956\n",
      "03:24:09 [DEBUG] train episode 126: reward = -19.00, steps = 1067\n",
      "03:31:37 [DEBUG] train episode 127: reward = -21.00, steps = 991\n",
      "03:40:52 [DEBUG] train episode 128: reward = -17.00, steps = 1217\n",
      "03:49:54 [DEBUG] train episode 129: reward = -19.00, steps = 1182\n",
      "04:00:15 [DEBUG] train episode 130: reward = -20.00, steps = 1351\n",
      "04:10:54 [DEBUG] train episode 131: reward = -19.00, steps = 1376\n",
      "04:18:58 [DEBUG] train episode 132: reward = -21.00, steps = 1033\n",
      "04:29:32 [DEBUG] train episode 133: reward = -20.00, steps = 1346\n",
      "04:40:01 [DEBUG] train episode 134: reward = -20.00, steps = 1326\n",
      "04:50:47 [DEBUG] train episode 135: reward = -18.00, steps = 1346\n",
      "05:01:32 [DEBUG] train episode 136: reward = -18.00, steps = 1319\n",
      "05:10:00 [DEBUG] train episode 137: reward = -20.00, steps = 1050\n",
      "05:20:00 [DEBUG] train episode 138: reward = -21.00, steps = 1234\n",
      "05:33:39 [DEBUG] train episode 139: reward = -17.00, steps = 1664\n",
      "05:46:50 [DEBUG] train episode 140: reward = -15.00, steps = 1591\n",
      "05:59:29 [DEBUG] train episode 141: reward = -15.00, steps = 1513\n",
      "06:09:02 [DEBUG] train episode 142: reward = -20.00, steps = 1131\n",
      "06:21:46 [DEBUG] train episode 143: reward = -19.00, steps = 1494\n",
      "06:31:41 [DEBUG] train episode 144: reward = -20.00, steps = 1170\n",
      "06:45:19 [DEBUG] train episode 145: reward = -19.00, steps = 1593\n",
      "06:58:50 [DEBUG] train episode 146: reward = -16.00, steps = 1558\n",
      "07:15:10 [DEBUG] train episode 147: reward = -15.00, steps = 1864\n",
      "07:27:11 [DEBUG] train episode 148: reward = -17.00, steps = 1353\n",
      "07:43:16 [DEBUG] train episode 149: reward = -16.00, steps = 1791\n",
      "08:00:18 [DEBUG] train episode 150: reward = -17.00, steps = 1872\n",
      "08:13:27 [DEBUG] train episode 151: reward = -20.00, steps = 1430\n",
      "08:28:15 [DEBUG] train episode 152: reward = -17.00, steps = 1591\n",
      "08:41:53 [DEBUG] train episode 153: reward = -18.00, steps = 1453\n",
      "08:56:28 [DEBUG] train episode 154: reward = -20.00, steps = 1533\n",
      "09:11:17 [DEBUG] train episode 155: reward = -20.00, steps = 1546\n",
      "09:25:55 [DEBUG] train episode 156: reward = -18.00, steps = 1514\n",
      "09:42:44 [DEBUG] train episode 157: reward = -18.00, steps = 1619\n",
      "10:00:54 [DEBUG] train episode 158: reward = -16.00, steps = 1828\n",
      "10:20:00 [DEBUG] train episode 159: reward = -16.00, steps = 1890\n",
      "10:39:15 [DEBUG] train episode 160: reward = -16.00, steps = 1831\n",
      "10:59:03 [DEBUG] train episode 161: reward = -19.00, steps = 1765\n",
      "11:24:36 [DEBUG] train episode 162: reward = -15.00, steps = 2275\n",
      "11:44:55 [DEBUG] train episode 163: reward = -14.00, steps = 1774\n",
      "12:09:16 [DEBUG] train episode 164: reward = -12.00, steps = 2113\n",
      "12:32:19 [DEBUG] train episode 165: reward = -14.00, steps = 2024\n",
      "12:56:01 [DEBUG] train episode 166: reward = -17.00, steps = 2057\n",
      "13:20:53 [DEBUG] train episode 167: reward = -13.00, steps = 2128\n",
      "13:48:35 [DEBUG] train episode 168: reward = -9.00, steps = 2337\n",
      "14:05:50 [DEBUG] train episode 169: reward = -19.00, steps = 1436\n",
      "14:32:56 [DEBUG] train episode 170: reward = -13.00, steps = 2220\n",
      "15:02:40 [DEBUG] train episode 171: reward = -14.00, steps = 2309\n",
      "15:38:23 [DEBUG] train episode 172: reward = -4.00, steps = 2789\n",
      "16:08:33 [DEBUG] train episode 173: reward = -10.00, steps = 2316\n",
      "16:42:58 [DEBUG] train episode 174: reward = -7.00, steps = 2641\n",
      "17:22:27 [DEBUG] train episode 175: reward = -5.00, steps = 2895\n",
      "17:57:45 [DEBUG] train episode 176: reward = -13.00, steps = 2493\n",
      "18:41:00 [DEBUG] train episode 177: reward = -4.00, steps = 3102\n",
      "19:26:31 [DEBUG] train episode 178: reward = -4.00, steps = 3180\n",
      "19:53:44 [DEBUG] train episode 179: reward = -13.00, steps = 1860\n",
      "20:09:31 [DEBUG] train episode 180: reward = -19.00, steps = 1074\n",
      "20:47:31 [DEBUG] train episode 181: reward = -7.00, steps = 2388\n",
      "21:29:32 [DEBUG] train episode 182: reward = -3.00, steps = 2657\n",
      "22:12:37 [DEBUG] train episode 183: reward = -3.00, steps = 2618\n",
      "23:05:50 [DEBUG] train episode 184: reward = -2.00, steps = 2873\n",
      "23:46:56 [DEBUG] train episode 185: reward = -7.00, steps = 2349\n",
      "00:22:10 [DEBUG] train episode 186: reward = -16.00, steps = 1689\n",
      "00:42:53 [DEBUG] train episode 187: reward = -20.00, steps = 836\n",
      "01:13:55 [DEBUG] train episode 188: reward = -12.00, steps = 1716\n",
      "01:33:40 [DEBUG] train episode 189: reward = -19.00, steps = 1102\n",
      "02:30:10 [DEBUG] train episode 190: reward = -2.00, steps = 3123\n",
      "03:18:06 [DEBUG] train episode 191: reward = -4.00, steps = 2569\n",
      "03:54:56 [DEBUG] train episode 192: reward = -9.00, steps = 1949\n",
      "04:22:56 [DEBUG] train episode 193: reward = -16.00, steps = 1469\n",
      "04:45:26 [DEBUG] train episode 194: reward = -18.00, steps = 1153\n",
      "05:42:10 [DEBUG] train episode 195: reward = -2.00, steps = 2807\n",
      "06:13:27 [DEBUG] train episode 196: reward = -15.00, steps = 1446\n",
      "07:08:39 [DEBUG] train episode 197: reward = -7.00, steps = 2641\n",
      "07:59:03 [DEBUG] train episode 198: reward = -8.00, steps = 2357\n",
      "08:38:35 [DEBUG] train episode 199: reward = -13.00, steps = 1801\n",
      "09:30:34 [DEBUG] train episode 200: reward = -7.00, steps = 2415\n",
      "10:33:12 [DEBUG] train episode 201: reward = 3.00, steps = 2829\n",
      "11:25:00 [DEBUG] train episode 202: reward = -7.00, steps = 2322\n",
      "12:25:23 [DEBUG] train episode 203: reward = -2.00, steps = 2653\n",
      "13:34:15 [DEBUG] train episode 204: reward = -1.00, steps = 3036\n",
      "14:40:30 [DEBUG] train episode 205: reward = -2.00, steps = 2790\n",
      "15:49:57 [DEBUG] train episode 206: reward = -3.00, steps = 2863\n",
      "16:56:35 [DEBUG] train episode 207: reward = -5.00, steps = 2679\n",
      "18:08:09 [DEBUG] train episode 208: reward = -2.00, steps = 2978\n",
      "18:45:50 [DEBUG] train episode 209: reward = -17.00, steps = 1557\n",
      "19:20:15 [DEBUG] train episode 210: reward = -16.00, steps = 1410\n",
      "20:10:49 [DEBUG] train episode 211: reward = -13.00, steps = 1788\n",
      "20:59:06 [DEBUG] train episode 212: reward = -11.00, steps = 1752\n",
      "22:03:31 [DEBUG] train episode 213: reward = -7.00, steps = 2242\n",
      "23:18:04 [DEBUG] train episode 214: reward = -8.00, steps = 2478\n",
      "23:52:46 [DEBUG] train episode 215: reward = -17.00, steps = 1200\n",
      "00:45:51 [DEBUG] train episode 216: reward = -12.00, steps = 1758\n",
      "01:43:28 [DEBUG] train episode 217: reward = -9.00, steps = 2070\n",
      "02:25:20 [DEBUG] train episode 218: reward = -15.00, steps = 1622\n",
      "03:06:23 [DEBUG] train episode 219: reward = -14.00, steps = 1559\n",
      "03:53:05 [DEBUG] train episode 220: reward = -13.00, steps = 1753\n",
      "05:01:47 [DEBUG] train episode 221: reward = -3.00, steps = 2642\n",
      "05:58:25 [DEBUG] train episode 222: reward = -11.00, steps = 2106\n",
      "06:33:23 [DEBUG] train episode 223: reward = -17.00, steps = 1293\n",
      "07:47:11 [DEBUG] train episode 224: reward = -5.00, steps = 2418\n",
      "09:06:21 [DEBUG] train episode 225: reward = -2.00, steps = 2706\n",
      "10:22:29 [DEBUG] train episode 226: reward = 5.00, steps = 2614\n",
      "11:38:14 [DEBUG] train episode 227: reward = -3.00, steps = 2601\n",
      "12:59:05 [DEBUG] train episode 228: reward = 3.00, steps = 2684\n",
      "14:20:42 [DEBUG] train episode 229: reward = -3.00, steps = 2568\n",
      "15:25:50 [DEBUG] train episode 230: reward = -10.00, steps = 2027\n",
      "17:05:03 [DEBUG] train episode 231: reward = 3.00, steps = 3062\n",
      "18:32:02 [DEBUG] train episode 232: reward = -2.00, steps = 2576\n",
      "19:48:16 [DEBUG] train episode 233: reward = -6.00, steps = 2220\n",
      "21:18:08 [DEBUG] train episode 234: reward = -1.00, steps = 2638\n",
      "23:10:13 [DEBUG] train episode 235: reward = -2.00, steps = 2895\n",
      "00:44:41 [DEBUG] train episode 236: reward = -8.00, steps = 2386\n",
      "01:58:39 [DEBUG] train episode 237: reward = -10.00, steps = 1939\n",
      "03:36:58 [DEBUG] train episode 238: reward = -2.00, steps = 2569\n",
      "05:09:47 [DEBUG] train episode 239: reward = -5.00, steps = 2561\n",
      "06:38:30 [DEBUG] train episode 240: reward = -4.00, steps = 2516\n",
      "08:32:04 [DEBUG] train episode 241: reward = -2.00, steps = 3077\n",
      "10:37:35 [DEBUG] train episode 242: reward = 4.00, steps = 2885\n",
      "12:35:12 [DEBUG] train episode 243: reward = -4.00, steps = 2656\n",
      "14:48:24 [DEBUG] train episode 244: reward = -2.00, steps = 2910\n",
      "16:36:19 [DEBUG] train episode 245: reward = -7.00, steps = 2345\n",
      "18:19:44 [DEBUG] train episode 246: reward = 13.00, steps = 2188\n",
      "20:27:47 [DEBUG] train episode 247: reward = -2.00, steps = 2690\n",
      "22:44:10 [DEBUG] train episode 248: reward = 4.00, steps = 2824\n",
      "00:38:19 [DEBUG] train episode 249: reward = 15.00, steps = 2061\n",
      "02:54:55 [DEBUG] train episode 250: reward = 1.00, steps = 2639\n",
      "04:20:25 [DEBUG] train episode 251: reward = 20.00, steps = 1656\n",
      "05:57:03 [DEBUG] train episode 252: reward = 16.00, steps = 1883\n",
      "07:33:20 [DEBUG] train episode 253: reward = 17.00, steps = 1849\n",
      "09:10:45 [DEBUG] train episode 254: reward = 17.00, steps = 1813\n",
      "11:33:50 [DEBUG] train episode 255: reward = 8.00, steps = 2523\n",
      "13:51:38 [DEBUG] train episode 256: reward = 1.00, steps = 2391\n",
      "16:26:04 [DEBUG] train episode 257: reward = 1.00, steps = 2633\n",
      "18:15:10 [DEBUG] train episode 258: reward = 18.00, steps = 1848\n",
      "20:21:14 [DEBUG] train episode 259: reward = 13.00, steps = 2082\n",
      "22:27:27 [DEBUG] train episode 260: reward = 16.00, steps = 1960\n",
      "00:36:49 [DEBUG] train episode 261: reward = 16.00, steps = 2123\n",
      "03:14:42 [DEBUG] train episode 262: reward = 3.00, steps = 2753\n",
      "05:31:44 [DEBUG] train episode 263: reward = 6.00, steps = 2375\n",
      "07:54:26 [DEBUG] train episode 264: reward = 8.00, steps = 2433\n",
      "10:30:39 [DEBUG] train episode 265: reward = 2.00, steps = 2538\n",
      "12:42:53 [DEBUG] train episode 266: reward = 14.00, steps = 2090\n",
      "14:46:01 [DEBUG] train episode 267: reward = 16.00, steps = 1920\n",
      "16:43:33 [DEBUG] train episode 268: reward = 17.00, steps = 1938\n",
      "18:28:54 [DEBUG] train episode 269: reward = 17.00, steps = 1916\n",
      "20:24:59 [DEBUG] train episode 270: reward = 15.00, steps = 2072\n",
      "22:13:45 [DEBUG] train episode 271: reward = 15.00, steps = 1894\n",
      "00:03:41 [DEBUG] train episode 272: reward = 17.00, steps = 1874\n",
      "00:03:45 [INFO] ==== test ====\n",
      "00:06:32 [DEBUG] test episode 0: reward = 20.00, steps = 1670\n",
      "00:09:18 [DEBUG] test episode 1: reward = 20.00, steps = 1667\n",
      "00:12:00 [DEBUG] test episode 2: reward = 20.00, steps = 1663\n",
      "00:14:43 [DEBUG] test episode 3: reward = 20.00, steps = 1663\n",
      "00:17:58 [DEBUG] test episode 4: reward = 19.00, steps = 1701\n",
      "00:20:44 [DEBUG] test episode 5: reward = 20.00, steps = 1748\n",
      "00:23:22 [DEBUG] test episode 6: reward = 20.00, steps = 1664\n",
      "00:25:59 [DEBUG] test episode 7: reward = 20.00, steps = 1662\n",
      "00:28:39 [DEBUG] test episode 8: reward = 19.00, steps = 1720\n",
      "00:31:13 [DEBUG] test episode 9: reward = 20.00, steps = 1665\n",
      "00:34:01 [DEBUG] test episode 10: reward = 18.00, steps = 1792\n",
      "00:36:35 [DEBUG] test episode 11: reward = 20.00, steps = 1662\n",
      "00:39:23 [DEBUG] test episode 12: reward = 18.00, steps = 1783\n",
      "00:41:57 [DEBUG] test episode 13: reward = 20.00, steps = 1665\n",
      "00:44:33 [DEBUG] test episode 14: reward = 20.00, steps = 1670\n",
      "00:47:18 [DEBUG] test episode 15: reward = 19.00, steps = 1758\n",
      "00:50:51 [DEBUG] test episode 16: reward = 14.00, steps = 2264\n",
      "00:53:29 [DEBUG] test episode 17: reward = 20.00, steps = 1671\n",
      "00:56:07 [DEBUG] test episode 18: reward = 20.00, steps = 1668\n",
      "00:58:44 [DEBUG] test episode 19: reward = 20.00, steps = 1671\n",
      "01:01:30 [DEBUG] test episode 20: reward = 19.00, steps = 1760\n",
      "01:04:08 [DEBUG] test episode 21: reward = 20.00, steps = 1667\n",
      "01:07:11 [DEBUG] test episode 22: reward = 14.00, steps = 1965\n",
      "01:09:48 [DEBUG] test episode 23: reward = 20.00, steps = 1665\n",
      "01:12:23 [DEBUG] test episode 24: reward = 20.00, steps = 1669\n",
      "01:14:58 [DEBUG] test episode 25: reward = 20.00, steps = 1663\n",
      "01:17:45 [DEBUG] test episode 26: reward = 18.00, steps = 1783\n",
      "01:20:23 [DEBUG] test episode 27: reward = 20.00, steps = 1669\n",
      "01:22:59 [DEBUG] test episode 28: reward = 20.00, steps = 1662\n",
      "01:25:36 [DEBUG] test episode 29: reward = 20.00, steps = 1668\n",
      "01:28:13 [DEBUG] test episode 30: reward = 20.00, steps = 1666\n",
      "01:30:55 [DEBUG] test episode 31: reward = 19.00, steps = 1734\n",
      "01:33:31 [DEBUG] test episode 32: reward = 20.00, steps = 1666\n",
      "01:36:53 [DEBUG] test episode 33: reward = 8.00, steps = 2149\n",
      "01:39:29 [DEBUG] test episode 34: reward = 20.00, steps = 1666\n",
      "01:42:06 [DEBUG] test episode 35: reward = 20.00, steps = 1667\n",
      "01:44:48 [DEBUG] test episode 36: reward = 19.00, steps = 1725\n",
      "01:47:25 [DEBUG] test episode 37: reward = 20.00, steps = 1664\n",
      "01:50:19 [DEBUG] test episode 38: reward = 16.00, steps = 1848\n",
      "01:52:56 [DEBUG] test episode 39: reward = 20.00, steps = 1665\n",
      "01:55:42 [DEBUG] test episode 40: reward = 19.00, steps = 1760\n",
      "01:58:21 [DEBUG] test episode 41: reward = 20.00, steps = 1665\n",
      "02:00:59 [DEBUG] test episode 42: reward = 20.00, steps = 1668\n",
      "02:03:36 [DEBUG] test episode 43: reward = 20.00, steps = 1666\n",
      "02:06:20 [DEBUG] test episode 44: reward = 18.00, steps = 1736\n",
      "02:08:57 [DEBUG] test episode 45: reward = 20.00, steps = 1667\n",
      "02:11:43 [DEBUG] test episode 46: reward = 19.00, steps = 1759\n",
      "02:14:28 [DEBUG] test episode 47: reward = 19.00, steps = 1741\n",
      "02:17:05 [DEBUG] test episode 48: reward = 20.00, steps = 1664\n",
      "02:19:49 [DEBUG] test episode 49: reward = 19.00, steps = 1746\n",
      "02:22:58 [DEBUG] test episode 50: reward = 14.00, steps = 1991\n",
      "02:25:35 [DEBUG] test episode 51: reward = 20.00, steps = 1665\n",
      "02:28:19 [DEBUG] test episode 52: reward = 19.00, steps = 1743\n",
      "02:30:56 [DEBUG] test episode 53: reward = 20.00, steps = 1666\n",
      "02:33:43 [DEBUG] test episode 54: reward = 19.00, steps = 1766\n",
      "02:36:19 [DEBUG] test episode 55: reward = 20.00, steps = 1666\n",
      "02:38:55 [DEBUG] test episode 56: reward = 20.00, steps = 1660\n",
      "02:41:31 [DEBUG] test episode 57: reward = 20.00, steps = 1667\n",
      "02:44:07 [DEBUG] test episode 58: reward = 20.00, steps = 1661\n",
      "02:46:53 [DEBUG] test episode 59: reward = 19.00, steps = 1757\n",
      "02:49:30 [DEBUG] test episode 60: reward = 20.00, steps = 1671\n",
      "02:52:13 [DEBUG] test episode 61: reward = 19.00, steps = 1725\n",
      "02:54:50 [DEBUG] test episode 62: reward = 20.00, steps = 1667\n",
      "02:57:27 [DEBUG] test episode 63: reward = 20.00, steps = 1662\n",
      "03:00:04 [DEBUG] test episode 64: reward = 20.00, steps = 1669\n",
      "03:03:14 [DEBUG] test episode 65: reward = 18.00, steps = 2025\n",
      "03:05:56 [DEBUG] test episode 66: reward = 19.00, steps = 1727\n",
      "03:08:43 [DEBUG] test episode 67: reward = 18.00, steps = 1777\n",
      "03:11:19 [DEBUG] test episode 68: reward = 20.00, steps = 1660\n",
      "03:13:57 [DEBUG] test episode 69: reward = 20.00, steps = 1660\n",
      "03:16:33 [DEBUG] test episode 70: reward = 20.00, steps = 1669\n",
      "03:19:09 [DEBUG] test episode 71: reward = 20.00, steps = 1666\n",
      "03:21:47 [DEBUG] test episode 72: reward = 20.00, steps = 1671\n",
      "03:24:34 [DEBUG] test episode 73: reward = 19.00, steps = 1781\n",
      "03:27:15 [DEBUG] test episode 74: reward = 18.00, steps = 1731\n",
      "03:29:59 [DEBUG] test episode 75: reward = 19.00, steps = 1744\n",
      "03:32:45 [DEBUG] test episode 76: reward = 19.00, steps = 1756\n",
      "03:35:21 [DEBUG] test episode 77: reward = 20.00, steps = 1660\n",
      "03:37:58 [DEBUG] test episode 78: reward = 20.00, steps = 1666\n",
      "03:40:33 [DEBUG] test episode 79: reward = 20.00, steps = 1665\n",
      "03:43:09 [DEBUG] test episode 80: reward = 20.00, steps = 1669\n",
      "03:46:14 [DEBUG] test episode 81: reward = 18.00, steps = 1993\n",
      "03:48:48 [DEBUG] test episode 82: reward = 20.00, steps = 1666\n",
      "03:51:24 [DEBUG] test episode 83: reward = 20.00, steps = 1670\n",
      "03:54:18 [DEBUG] test episode 84: reward = 18.00, steps = 1853\n",
      "03:57:00 [DEBUG] test episode 85: reward = 20.00, steps = 1726\n",
      "03:59:48 [DEBUG] test episode 86: reward = 18.00, steps = 1781\n",
      "04:02:24 [DEBUG] test episode 87: reward = 20.00, steps = 1671\n",
      "04:05:07 [DEBUG] test episode 88: reward = 20.00, steps = 1728\n",
      "04:07:51 [DEBUG] test episode 89: reward = 19.00, steps = 1730\n",
      "04:10:27 [DEBUG] test episode 90: reward = 20.00, steps = 1662\n",
      "04:13:04 [DEBUG] test episode 91: reward = 20.00, steps = 1666\n",
      "04:15:41 [DEBUG] test episode 92: reward = 20.00, steps = 1671\n",
      "04:18:24 [DEBUG] test episode 93: reward = 19.00, steps = 1758\n",
      "04:21:08 [DEBUG] test episode 94: reward = 19.00, steps = 1719\n",
      "04:23:45 [DEBUG] test episode 95: reward = 20.00, steps = 1669\n",
      "04:26:22 [DEBUG] test episode 96: reward = 20.00, steps = 1669\n",
      "04:28:59 [DEBUG] test episode 97: reward = 20.00, steps = 1666\n",
      "04:31:35 [DEBUG] test episode 98: reward = 20.00, steps = 1661\n",
      "04:34:21 [DEBUG] test episode 99: reward = 19.00, steps = 1766\n",
      "04:34:21 [INFO] average episode reward = 19.24 ± 1.65\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAABHP0lEQVR4nO29eXydV33u+6x32KOGLXmU5EF24iRkji2HBJIQCilQIAF64IZbSm6hDS1DKXDaQw+9lHN7c8o99NBCoUBoOaSUBEKZwnAImcjQTJbtOLHjOHZsybZky5KlrWlP77DuH++71jtuacuSvKXt3/fz8Udbe3iHva3nffazfuu3GOccBEEQRGOi1PsACIIgiMWDRJ4gCKKBIZEnCIJoYEjkCYIgGhgSeYIgiAZGq/cB+Fm5ciXv7u6u92EQBEEsK3bu3DnCOV8V99iSEvnu7m709vbW+zAIgiCWFYyx/mqPUVxDEATRwJDIEwRBNDAk8gRBEA0MiTxBEEQDQyJPEATRwMxb5Blj6xljjzDG9jPG9jHGPu7e384Ye4AxdtD92Tb/wyUIgiDmwkI4eRPApzjnrwJwDYCPMMYuBvBpAA9xzrcAeMj9nSAIgjiLzFvkOecnOOe73NuTAPYD6AJwC4C73KfdBeAd890XQRBEPShWLPxw53Esx9bsC5rJM8a6AVwF4BkAazjnJwDnQgBgdZXX3M4Y62WM9Q4PDy/k4RAEQSwI/+P+l/CpH+zB4wdH6n0oc2bBRJ4x1gTghwD+jHM+UevrOOd3cs57OOc9q1bFzsolCIKoK+NFAwAwPFmu85HMnQURecaYDkfgv8s5/5F79xBjrMN9vAPAqYXYF0EQxNkmm3A6wBQqZp2PZO4sRHUNA/AvAPZzzr/oe+g+ALe5t28D8NP57osgCKIeZJIqAGC6YtX5SObOQjQoey2A3wfwAmPsOfe+/wrg8wDuZYx9EMBRAO9egH0RBEGcdTK66+TLy8/Jz1vkOedPAGBVHn7DfLdPEARRbzKJ5evkacYrQRDELCQ0RyrPyUyeIAii0bFspz6+QE6eIAii8bDdSVDTZRJ5giCIhkM4+aIRH9fcv+8k3nvn02fzkGpmSS3/RxAEsRSxZnHyH/rOTgBAxbRlfr9UWFpHQxAEsQSxZSYf7+SFsBeXYGZPIk8QBDELlu38rObkk67ITy/B6hsSeYIgiFmwbEflp8omDMtGyQiKfUp36uhrLbEsmxbKpreN8aIhvy0sNCTyBEEQsyAy+fGiga1/8wAu+r9/he89e1Q+Lp18DdU3O/pGcfFn78cln70fzx/PAwDe8/Wn8OHv7lr4AweJPEEQxKyIuAYAJkuOWz9yelre5zn52UX+4ZdOwbI5TJtj36DTsHcwX8Ta1tQCHrEHiTxBEMQs2KHFQhKagorpKX9yDjNid/aN4bKuVigMOJEvYqJkYLJsojNHIk8QBFEXLF9ensvoaE5qMKyoyM/W26ZsWnjueB6v3tSOtS0pDORLOJEvAQA6c+lFOHISeYIgiFnxi/yFa5qhq2En7zYwK5vYf2ICewfGYwdS9w5MoGLa6OluQ2cujcF8EYP5IgASeYIgiLrhj2tuvrIzEtekdEdKHz84jLd86XG87R+fwG9ejq6TtOdYHgCwdaMr8uNFDAiRbyWRJwiCqAumzbGyKYEHPnEDfu/VG6GrDIblCb9w8i8PTcn78gUjsp3jY0VkEipWNSXRkUvhxHgJA/kiNIVhVXNyUY6dRJ4gCGIWbJtDYQxb1jQDABKairLPySuukg6MFeV9phWNawbzRXTm0mCMoSuXRsW0sXdgHGtbU1CVastyzA8SeYIgiFmwbA7NJ8IJTQkMvApBL/omSZkxmfyJ8aLM3kU8s6NvdNHyeIAalBEEQVRleLKMoYkSLM6h+EVeZYFMPlxiCQCmbUfuG8iX8KqOFgBAh1syWTJsdC5SjTxATp4gCKIq33j0FfzhXb2wbR6IUyJO3ufaVzYlACCQ2QNO+eTIVFm69o0rssi6ywpe3NmyaOdATp4gCKIKU2UT02UTFgdU5om8ripy5isQLLFc2ZTEyFRF9rsRnBwP1sM3JTU885k3olAxsappcQZdAXLyBEEQValYNkybOwOvgbgmWELpF3lRJRN28rJU0jeztSmpYXVzCowtzqArQCJPEARRFdPisGznn9/JJzQFFWtmkbdCA6+DYmbrItXDV4NEniAIogqmbbv/5uDk3ejFtIJxzQnXyS9WI7JqkMgTBEFUoWJy2NzpJ6/61DI88Gr5qmvaswloCoMRcvKnpytoTmqyY+XZgkSeIAiiCqIMsmLZkYHXak6+LZOAprJIXGNYNvQ6rP9KIk8QBFEF4dbLhh0poawq8tkEdCXo9AEn39cWaVbrTJDIEwRBVEFUyFSsoMjrqhKonvGLfHtWhxrj5E2bQ1fJyRMEQdSVf3jwZTz1ymkA3uBpxbShxFTXcDeLD/abT0BTlEgJpWnb0NSz7+RpMhRBEISPbz52GPmCgWvPW+E5eTPo5MUiIYbFkdAc1375ulZc1tWKje0ZaAqLTIaiuIYgCGIJYHNvwFVm8mY4rnFui1p5i3NsXpnFHe+8DJqqQFNZpAulYdkU1xAEQdQbi3MZv3gibwXjGlesxeCraQXr6HVViZRQmjavS1xDIk8QBOHDtj2RF43HIk5exjWOyNs8GMWoMXGNYdnQFHLyBEEQdcXiXIq76cvkZ3TyoS6VmsKiA68WlzHP2YREniAIwoVzDs69ahmRuVcsO7JoiP/xcCviuMlQpk1OniAIoq6EYxpRQsk5gpOh4pw88zv56GQow6JMniAIoq6IHjSWJQZePTeuxDl50+/kPTnVqzh5qq4hCIKoI2Ks1AxV1wCA34QLsRaPO5m897iqREsoqU6eIAiizkgnH6qTB2Z28hYPO3kFRkx1DTl5giCIOuLP5G3baTMsCHehBHyToWKcfFzvmmWbyTPGvsUYO8UY2+u7r50x9gBj7KD7s20h9kUQBLFY2LbXiybsxOPaGlRMp3+NFcrkY3vXWHxZV9d8G8CbQ/d9GsBDnPMtAB5yfycIgliyiLjGtHlEpOPiGsPy3H6wuqbaZKhl6uQ5548BGA3dfQuAu9zbdwF4x0LsiyAIYrHwO/nw8n2aEhfXWDKW8Ucxcb1rlnVcU4U1nPMTAOD+XB33JMbY7YyxXsZY7/Dw8CIeDkEQxMz4nXwlJPLhVsOAE9cIkVdYuHdNuAvlOTrwyjm/k3PewznvWbVqVb0PhyCIcxjL9qprwk48vgsllxeGSO+aOCe/XOOaKgwxxjoAwP15ahH3RRAEMW9knbzFZxT5pOosxl0xbSnmSugiEOlCaXFoDebk7wNwm3v7NgA/XcR9EQRBzBvLt9JTLXGNYdnVnXx4IW/bXr4Nyhhj9wB4CsCFjLHjjLEPAvg8gJsYYwcB3OT+ThAEsWSx/AOvkRJK77aMa0xbPk9RqveusWyn8Vk9SigXZPk/zvl7qzz0hoXYPkEQxNnA9pdQmqG4xl8iqSpQmCPy4lqgheIav5MXgt9o1TUEQRDLCmuGyVBKaNBUVx23Lpy8/yKgKkog0xe9cJZtXEMQBNEIeG0NotU14cqYhKag7HPy4eob/0VC1Nwv5xmvBEEQyx7bN/Aa7gcfdvJJLeTkQwOvnHuTq8Ts2Xo4+QXJ5AmCIBoBf4OysMj74xjAiWsqpi0vDGrMjFjDtvHg3lOYKhsAUJcSShJ5giAIF9u3aEi4d40a4+TLpi3z9rCTB5yLxkfu3iXvb7TJUARBEMsKy7doSLh3jcLCIq+ibHq9a8ILeQOIVOick20NCIIglgr+6prwZKiIk9cdJy9FPqbf/PBUKfAaKqEkCIKoI16dfLS6JjzwmtJUlAyfk1ejcc3R0ULgNVRdQxAEUUeEYNscUScfjmtmdPLO7f7TQZGnOnmCIIg6IvrQAEDJsAKPhQdNk5qKkuGJfLB3jSOtESdPmTxBEET94AGRd3vSuNodqZPXlcDAa7gLJQAcC4m8TtU1BEEQ9cOf0BRdJ5/SnbbCYROe0lSUjfgulFoVJx8evD0bkMgTBEG4+JuKlU1H5NOuyEdKKF0nb8Y4eSHm4Uye4hqCIIg6YvvimrIb13hOPq66xpatC8JdKAGgbAYHb2nglSAIoo74nXyxYkFhzsxWIFpdkwo7eRZ18gDQltHlbSqhJAiCqCN+J18yLWiqIgU72qBMhWHx2F7x/pmtnbm0735y8gRBEHUj7OQTPpGPc/IAUChbkcf9Tt4v8pTJEwRB1MjYdGXBt+kX+ZJpQ1OZdOiqGnbyjnxOV0zn8ZhMHgA6W1PyNjUoIwiCqIEdfaPoueNBDOSLC7pdOzQZSlcVmaNHnbwzIFuouE4+poQSCMc15OQJgiBmZTBfhGVzDE+WF3S7/jr5kmFBV5h033ENygBguhx18v7bHYG4hpw8QRDErIjyxkqoRHG+hNsaBAZew05eq+7k/Y69K+fFNTpV1xAEQcxOyZ2oJCYsLRS2f+DVsKD7M/kzdPKrm32ZPDl5giCI2VkoJ18yLOw+OiZ/Dwy8GjZ0VZHNxuLaGgA+Jx/ThRIA0glV3iaRJwiCqAHRITI8o3Su/PS5Afzu157EeMFZg9U/8FqsOAOvepW4Rjp5t7rGP9jqL5UUbREAimsIgiBqQoj7fJ38WMGAzYEpV6j9Tn66YiKtq16dfMxkKMCrk/frt79UMiV730QnVJ0NaCFvgiCWHcLJz1fki27UUna35x945RxIJVQZsUQGXmdy8qF8PqHVz0+TkycIYtkhnPx8B17lxcKtnfQPvAJAWvcy+XCengxl8kEnH5TWtK7WpZc8QCJPEMQCUDYt/OtTfRGRXCz8mfwPeo/h9JRTL7/76BiePTJa83ZEz3gxkBta8S8gznHL/wFedY1f2MXsWDEAm9bVurQ0AEjkCYJYAJ48dBqf/ek+7B0cPyv7E05+MF/Cn//78/jZnkEAwN8/eBB3/HJ/zdsJO3l/XAM4lTHVGpSFZ7z6H05pCnIZHX/7rsvd5yp1aU4GUCZPEMQCUDYXptqlVoQ45wtO/5ppX7YunHUtFEOlmOFvIildRQnREkkg2LtGVRiY73FNVfDcZ387uB3j7Lw3YcjJEwQxbyqWI47GWRJ5cTEZLzqlj0L0LZujMBeRrwQnVUWc/AzVNQlVAWPOAG34AhDG/43gbEMiTxDEvBHiXgmH2ouEEPWwyBs2l65+LtuZycmLrD0c1zDGvAVFZhHwtK7WLa4hkScIYt6IhTMM6+wMvIadfFE6eRuFylzimmDMZEWqa1SvQVmMW6+2NGCYeg68UiZPEMS88UT+LGfyQuQrzn5Ni8OwOCqmXVNtuhfXxA+8phKqrJSJm6xaq5O/5aounJoozXo8iwGJPEEQ80Zm8mdJ5CtVMnmx/0LFREJLzLqdcHuEaJ285+Tj1met1cnffEXnrMeyWFBcQxDEvBHiutCtf6sRztKLvoFXADXn8sXQduIHXuMXDQFqd/L1hESeIIh5IwZez3YmLxCxi9h/scZc3svkxUXCqZoRpBO+BmUxaimd/CzVNfWERJ4giHlTr0xeEHHy5RqdfCVaXePP8lO+AdPYuEarLa6pJ5TJEwQxb852Jh928kL0Tdu5f7oGJ2/bPNLN0uKuyLurCqZ1FTdf2YnmlBboCy/4kxvPw3kvNmF7d9sZn8tiQyJPEMS8kZn8WRB507JhhgZIi1LknfsLNTh5/4XCP/AajGtUdOXSeN81G2O38fqLVuP1F62e2wmcZRY9rmGMvZkxdoAxdogx9unF3h9BEGcfGdeYi5/Jx7VOkE7eEgOvszv5oi/ykXEND8U1WtS9LzcWVeQZYyqArwJ4C4CLAbyXMXbxYu6TIIizz9nM5ONE3ht4FSWUszt5v8h7bQ2cfF1k7HERzXJjsZ381QAOcc4Pc84rAL4H4JZF3idBEGeZinn2MvnwoKtzX3DGai1NyoqVGCdvcyjMG0hN6STys9EF4Jjv9+PufRLG2O2MsV7GWO/w8PAiHw5BEIvB2czkhZPP+lx2xbIDWX2xBidfCjh57yKhKkxOgEqTyM9KXF1RILTjnN/JOe/hnPesWrVqkQ+HIIjF4GzGNUKcW9J64H5/2WQtk6HiMnmLcyiMycimXk3FFpLFFvnjANb7fl8HYHCR90kQxFmmHgOvLamgyE+UDHm7liZlgbjGt/yfEPi0rgZ6xC9XFlvkdwDYwhjbxBhLALgVwH2LvE+CIM4y4Tr5E+PFOXWDnAuekw9WgE/5cvjpsgXL5ug/PV11O8LJN6c0b/k/7sU1jZDHA4ss8pxzE8BHAdwPYD+Aeznn+xZznwRBnH1MXyZvWjbe+uUn8Hf3v7wo+6rm5P0iX6iY+Len+/G6L/wGLxyPX5JQXCxyGR1ly8vkRVyTTjRGQ4BFnwzFOf8lgF8u9n4Igqgf/kz+pZOTGJ2u4MlXRhZlX+FMviWlYaJkYtIX10xXLBwZcVz8YweHcdm61qrbaU3rKLu3benklYaokQeodw1BEAuAF9dw9PaNAgAODE3KVsALiefkHY/annVaCk+WfE6+bGJNSwoA8OKJidjtiEw+l054C3nbHKp08iTyBEEQAPxdKG309o/JtU93HR1b8H2VZJbuOPm2GJGfrliyE+W+gfi4Rizi7Th5MfDqdJukTJ4giGWPYdn41d6T4DxaETOYL2J3jEA/9cppjE1XYrcFOKWIvX1jeMNFq6EqDDv7nG2MFw08cXBh4hvp5N2B1/aMI/L+TL5YMWUZZd/pAoYnnY5ju4+OYTBfdJ7jH3g1bfx630mUTStQXdMIkMgTxDnKHb/Yjz/+t53Y2R8V87d86XG885+eDNxn2xzv/9YzuPvZo5HnC5EfK1RwcqKEqze145LOFuxwo5s/vGsH3vcvz9Q0E3U2Cu42cmlH3D0n70RDzSkNU2UzUN1zcGgSAPCxe3bjSw8eBABMFA00JTWkdBUjU2Xc/p2d2HN8HApjOH91Ey5c2zzvY10KUBdKgjhHeeDFIQCIdHQEvGX1ChUTmYQmn2dYPHY2qVisY6zgvK4lpWPbxjbc8+xRGJaNHa6jD6+8dCaMFQwkVAW5jBvXuD+n3LimLZPA0EQp0L9G3J4qmxieclz9ifEiOlpTkbVgVYXha+/bNu/jXCqQkyeIc5SBUGzhR/RuGcx7i0+LvjCGHZ3VKgYu8wUnyskkNfRsbEfJsLFv0Bv4tBZg5aix6QrasjqSbpwSzuTbMjrKpo28ezEAgsv8jbpx04nxEjpzabmEn2Apr/J0JpDIE8Q5yGnXzQLxvdfXNCcBQObXgLcgR5xQeyWUzmPZhIoedyENUW3jbGP+Ij9aqKAtk5DiLDL5CSHyrugPTZSwosm57V+we8y9EA3mi+jMpQL94wFAWcKrPJ0JJPIEcY6xd2Acf/Hvz8vf43qvr3bLD/0iLwx8nFAbofa/mYSGNS0prG9Py1we8L4NzIUf7z6Ovb4Kmbwr8iJmaUnrUBgwVXaiojZX9E+GRN60bFg2x+h0BSXDwshUBZ2taSR1cvIEQTQQP+g9hocPnMLmlVkA3kCmn5VNrpMf9+Ia4eTjmpCFF/DOJp0o5bKuVhw4ORnZxlz4m5/vx3ef8QZ7R6craM8mcN6qJly9qR2XdbUipauyukaIfL5gYEXWOY+iYclIabJk4vhYAQDQmUtHnPxSXq/1TCCRJ4hzjImSia5cGr/8+PUA4js2CjPrd/LChYfdOOc80mJYDNZ25dKBC8WZOPmKactFPQBn4LUtq6M1rePeD12L9e0ZpHRVdqEUA7EAsMKNbooVW3aaBCDHCTpyKSRCM1spriEIYlkzWTLQnNKR1BSoCottJCbEOCDy3JvV6icuvsm4s0U7c+mAuJ5JJl+xPIG2bI58oSJzeEFSU6STz2W9x5pSGhKa4jj5GJHvyqURTmcaoLtwABJ5gjjHmCiZaE5pYIwhk1Bjl8oTIn/CH9dYwskHXXtcfJN1nXxnLh273VrhnMOwbDkBaqJowObe4KrAcfKOyPsvAJmEhrSuomRYgWUD9w06Gf/a1pSsthGQkycIYslx9zNH8Z2n+gL3lQwL7//Ws3jHV/8jUOEyWTJl35dsQoutrhFiPJAvyhmxXgllUKhFD3n/DFHR96UrJPLmHEsoLZuDc29Rj1G3MqYtxsmLi5U/rskmVKR0BcVKUOT3DkxgZVMSSU2NfJOhgVeCIJYcP9k9gB/tHgjcd2y0gMdeHsZzx/J48pXT8v6psiH7vmSSamx1jRD0imnLGnMRtYRLKEUeLwZbE6oiK186WlOx260VEQ2JTF60VAg7+aTvApPzO/mk4+SLhhXI9ceLBrasbgIA/NH1m/H712zENZvbAdDAK0EQS5CKZUdmovoHQ/2RyqQb1wCuk58hrgG8ZfVs19GHK2TEtsVgaybpCW57NhGYbDTX6hox8Uo4eTGjNi6TF6R0RX6rcJy8GsnkAWC7W8efyyTwN++4FCvciiKKawiCWHIYlh1YmBpAQNTEbc45JksmmpKuICfU2H4y/vYDIs4QUUt48NQTeSGsXrcUxlggspmzk3ePW0QtnpMPLhji7xipKYq8iGWSGtKJaCYPANu62wO/i4XBKa4hCGLJYVh2pD2BvwpGuPqi4SyLJ+KabDLeyfuFXDwuBDqcqxsyrvEuHH78g69zra4R51BLJi/QVOb7pqLKgVf/RY8x4KoNucA2xDcRimsIglhy+BuHPfDiELo//QuMTnutC4TAif4uQgTTifhM3rY5dLeWUDh5r4Qy6Igr7sCrEPewyM/LyfuWFQQcJ59Qlcg+Ak5eZd6YQ8KfyTvb0FWGC9c0R5YPFGMKDWbkqQslQTQCFdNGyV344vP/ez8Ap4+6wJCzPb12vIDjdOOqa0zX7Y9OV2QmL0onw0ItnbzI5BNBWfnjG8/DyuYEvvrIK3N28kLcxaIeJcNCSlfAQkoccPL+uCahIpVQUax4Tv5zN1+CV3W0RPYlu20uQBO1pQQ5eYJoAEzbloton3Zza9OKZvKiiVeLz+lWc/KizDKcyUdKKEUm7zrhbDLosjetzOKmi9cCiNbYz0bYyRs2h65GZSulV4lrkiKusVGxnIvVa89bia0b2iLbEJl8eGxjuUMiTxANgMiuS26LXQCYdAdUGfNEcioU12STzmSo8OpQpm3LyMNz8tUmQ4XjmmhAoLk591xdsqjBFwttm5YNLWZKalLzD7wyNCdj4hr320C4f7wg444plMy5XYiWOiTyBNEAiCoUfxmlEPSmhCZzcy+T90TQsqO9Z2zuLa8XzuSrDrwmvAtHGDGYac9x0ZBKyMmbFoemzOLkfXFNNuFU1xQrXoOyaiIvjr9MTp4giKWGEDB/1CB6uWSTmq8Do+Pym3yZPBDtKW/atnTDooGZyNOrl1DGZ/KAz8mf4cCrYXHYNofpGxD2E3byq1uSSGoKMkkVKbd3jXhvwouECMQ3kUZz8jTwShANgBDDU5NeRY1w8tmkKp1+uLpGRBTTFTMwi9S2nYsDY14rYjHT1bTCcU1wxms2Ud3Jn2l1DeBcyEzbhjZDJq8wZzLT7716I67fsgq6qiDlHo8492pOXvSVp0yeIIglhWVzCO08PDwl7xeZfFPIyTPmRDiA517DtfKmbUNTGDK6GnHy4S6UFZnJi7LMOCfvSM2cM3mfyJdNG4bF5bcCP8LJiwtANqnJChox+1WsWxvuHy8QZZgU1xAEsaTwC+Erw9PytufktUB1TVNCk1P3RQ4dnvVq2YCqMmR8k6XsUKMyuX8z5OTjMnn1zJy8GEsAnP41pmXPWF0TdwHwi3xCi5Zfym1oorqmseIaEnmCWOb4B02DTt5xrtmk5quT9/rWANWdvGXbUBlz6uhFCaXM5G0M5os46bYhnk8m3396OtLq148RKgM1bT5jdU2syCc8ka+WxwPehaJkNpaTp0yeIJY5/vVVj4wEnbymMCQ1RTr56bIp2w8AniBHnTyHqjCnjj40Gcq0OV7z+YcBAH2ff6u8yHS0pqApDOvbgu2FAX8mH3TJr/vCb5BQFbx8x1vizy0S19jQZ6iuic/rHZHPF2YW+VXu4uVvu7yj6nOWIyTyBLHM8Wfkg/kiFOaUQE6VTSQ0p+2vLEG0g3GHGIQM5+xC5J06enfgVSzk7XtuyVe1smllFrs+e1OkXQAwc3VNxbLBOY+NUSJO3uKxA6czOnlX5CeKRqAKJ0xzSsfe//YmZPTqz1mOUFxDEMscvxBOVyysbUm59zuCmFA9J29YwRJEIZhiNqjA4s4ApzMjNuzkvf29MDCOYsX5PaWrsQIPxFfX+Cdg9ftaMPip+JusmTYMm8e69eRMmbwb1+TdTH4mmpIatRomCGJpEZ7ItMa3UIeuRp28XySF4Btm1MkrwsmXQ5m8xWWuv6NvFEXDQsJdL7YasromprslAPT2j8W+zh9FlU2nbYNeQ3WNn8DAa5XKmkbm3Dtjgmgwwl0hVzUlIXQwoSrQVUWKZbgEUTjbcmgblu05+UirYZvLqpydfWMoGVZg6b844py8qFsHEFie0I//W4OIa+IGXr1MPu4xVe47qZ97knfunTFBNBhhF76iKSHFO5LJh0oQk6rqbsMTU86dunvFra6Zlpm8tx/Ru/7gqSkUK7OLfFzvGjH7FgBe8VUFBc7NCpZQGlUmQ82UyfuricjJEwSx7AjHNblMQopeQnUyecPi4JxHShB1jUW2IcRcU9w6+VCDMsCrxilUTBQNS+be1VAUBsaC1TUTPidfqTJJKry6lWnx2LjGq5OPStrKpqQUf3LyBEEsO8JtBtozUScPOEJuhBp8CWfrF1ORmyuK4+Sd19mBPF3cni5bKBrWjKWJAk1hgW0IJ78im4isvyqItDWwZnHyMXGNqjCsdccpyMkTBLHsCJc/tmUTUsx0lQWE3IlrPCFUXYftF1Mxs1Vk8oAzSBo3W7VoWChUzFmdvNhXXCbfnk2gUmUCUqBO3nCra2Z08vGDv52tTu3+TCWUjQqJPEEsM0qGhc/dtw8TrhMOD7y2Z3UZSyQ0xaugsURc4/3ZM8YCJZaAF8s4k6HEjFgzIvJtGadccnTamDWTB5woxe/kRZfM9mxCXqgG8kX87S/3w47pk1OWTn5u1TUA0JlznXwN3zgajXPvjAlimfPcsTy+/WSfrEgRebqoYMll/E5eQcIVwIqcMRoUSf/ALBAU+bSv7UF4IpNYTPv0VLkmkY86eecitbIpKS8yD+w7iW88dhjHx4ry3GQFkOEcQ1zurqsMCpvBybvrzMZdIBodEnmCWGaImEO4XOHkxXJ97ZkEkq7oJn2ZvGHFlyDO5OSFQy4bdqQlgWhNPDpdke18Z8LJ5L1tTJZMMAbkMro8h1F3VSvRP8YwbTS5bRgq7vHH9ZNnzDnWak6+wxX5iaIR+3gjMy+RZ4y9mzG2jzFmM8Z6Qo/9JWPsEGPsAGPsTfM7TIIgBJOhmEaKfNqJT9qyCSTVaFxTNuP7sSe0eJFXGAs07QqlQjKuMW1+hk7eRFNSQ1JT5f7H3GZlYoUrw7JlV8uyUb2fPODk8tWcfJcb14xMVW+G1qjMt3fNXgDvAvAN/52MsYsB3ArgEgCdAB5kjF3AOW+s9m4EUQc8J++KvFsn35LSoSoMLSlNZvK6qsjKl4rbjz0urvHn+pZv4HUmJ5/LeIuM1Cry/jr5iZKBlpQOXWNyMtZowRV5Q4g8R1JToatMVgfFlVACTi4/W1xzeroc+3gjMy8nzznfzzk/EPPQLQC+xzkvc86PADgE4Or57IsgCAfp5F1xF3l6a1pHW0aXg6mAN+MVEHFN1AnrajCTF0KsKEEnH83kvT41Z1pd05zSkFSdiwznHPmQyFfcyVsJVZHuvpqTTyfU2F7zANDhVtcoVXrJNzKL1YWyC8DTvt+Pu/dFYIzdDuB2ANiwYcMiHQ5BNA5ixadKKK754HWbkC86Ilm1Tj6mBNHJ5D3xtas4eTsk8n4nn6qpuiZaJ9+c0qCrCjh3Yp/RaTeT98U1CZUhqatyAla1wdP//NsXynbBYVrTOv767RfjdResmvU4G41ZRZ4x9iCAtTEPfYZz/tNqL4u5L3ZKG+f8TgB3AkBPT8/clo0hiHOQSFzj/rx6U7vsFS8iGt3v5E3b6UkTEkk9VF1j+gZehZMvxzj59uzc4xqLB0soVzUlvYuQaXuZvOGJvHDyor1CXD95AHjrLH3g/+C1m2Y9xkZkVpHnnL/xDLZ7HMB63+/rAAyewXYIgggRFXlHOOP6xPura8pC5EMimVSVwGQk219do/sz+RnimhraBWiKIhcDF+exeWVTQORFJi+W4DNM7owr6IpcvORcLIOcD4tVQnkfgFsZY0nG2CYAWwA8u0j7IohzCq+6xs3k3coUf2mhiFmECwa81r7hEkRn4DXaskBlTH4jiM/kfU6+xkzejMnkxcVpvGjIcwlk8m5PfLF4SbVMnohnviWU72SMHQdwLYBfMMbuBwDO+T4A9wJ4EcCvAHyEKmsIYmEQTt5bCMRpVeBfWSkuk68mkrrKqtbJp/TqmXxLWpctjWvK5FUmK3Q4524mr8vjG5ooyeeWjHAmr2DKdfLVqmuIeOY18Mo5/zGAH1d57A4Ad8xn+wRBRImrkw9HMAGRDzn5yMBruIQyMBnKdfJG1MknNAXZhIbJslmTyPud/Oh0BYbFsaYlKY/vpE/k/XXyuqogpanycXLyc4PeLYJYZsRl8uEIJjDw6t6WA5dxJZRmtE5eVRh01VnxSeT54X1k3IlKtfWu8UooB/OOYHfm0rFOXsQ1zgxXBemEKs87bsYrUR0SeYJYZsS1NQg33opz8l6dedTJl6vENYAj5k51jR15nVghaq6Z/EDe6U3TlUvL4xua8CYqhevkU7oqG5rF9a4hqkPvFkEscabKJsbdni62zaXY+evkw+7cWzSEScGflpl2+LleXDNeMDDlXkSEyKd0FSXDhmUH+7EnNVWKe61dKMUF5MS4I/IdrSn5TUM4+aakFqyT1xjSuuotZkJOfk4s1mQogiAWiM/8+AWcGC/h3g9di6mKt5qSf93WsMjHOnkjfjKRf8brB+/agVG3Vl1lQSdv2TaSvpr6pM/JzzWTH8wXkdQUtPt635+aKIMxYHVLMtDWQFcVpL1qTYpr5giJPEEscV4emsJJ1/n6F78W7rsSWggE8DL5hKpGnHykQZlvoe+hyZKMTcTFQDh5010Ie7LsvU5m8jV2oRTVNYP5ErpyaacFg7sE4VihgoyuoimpeSJvOoPKSoL5tkMBxFwgkSeIJc5gvojxooFCxQwsfi0zeTMurvFWhlIVp9d6Qc4Yrd5PvmzYchBWCTl5m3Nf/T1zlwd0M/k5NigbHC/KpmEJdzHxfNFAJqkhpaty/MCpk2dgvmSZ4pq5QZdEgljCTJdNjLs90AfzpYCT92fykYFXX6th8bNQpcGX7lvo29/eQDjmpHDyFpfdLYXYZ+aSyav+6poiOlqDqzXlCxVkEyrSuoqSbw5AQlUC2ycnPzfo3SKIJYwYoBS3hZMX67Lu7B/F8FQ56uTF8n++FaKmZ6iuAZyLRtnwRF5oqZfJe05evCbcK2cmVHfgtWLaODVZlk7evzxhJqE5Iu+uKWtzuNU15OTPFBJ5gljCDOS92vHBfFE6+da0s5rS737tKewdmIhk8hvaM0ioCrravAWsRRfHcHWNf6HvOCcfyOS14MXjvFVZdK/IQKlhFqroQjk8VQbnwNrW6Lqr2aSKlK6gaFiy93tTUgtk/tUalBHxUCZPEEuYwbzn5AfyJax2W+m2ZxNyIBWITnA6f3UzXr7jLfL3dEKRpZHVnHzRsAITntSAk3cGd0U8I74pvO+ajfj9a7trOhfRT150mlzhdrH0l2VmEo6gFw0Lu/rHAABXbsjh0Kkp+Rxy8nODLokEsYQ5kS9CYcDKpkTAya/IJjDhG4T1T2aKI+2bTBTXoAxA4KIBOPEK4Dj5smE5mbysv3ceY3NYhEOs8SpKNEWr4qiTd+Ka3r4xJDUFl3a2BjJ5KqGcGyTyBLGEGciXsLYlhfXtGZnJawpDc0rHRNEbhN03MD7jdmaaMSq+BUz5BnWBcJ2809ZAtjCuobVwGOnk3XbCYtERv8iLTL5oWNjRP4Yr1ueQ0GjgdT7Qu0UQS5jBfBEduTQ6c2lZXdOc0pBQlUA5pRhUrYZ/spIaU0IJAJNlI3C/qnoiXzIsWJwjoSpQWDBiqRWRyY+FnLweiGuc6hrT5tg7MI6ejW0AgnX4FNfMDRJ5gliicM5x8NQkuldksbYlhaGJkmzPq4f6zfz12y+ecVvBuCM88OqI5mQVJ5/SVenkVYVBUxUZ28wF1V00ZLRggDFn8BhwxF+kPiKTB5weOj3dbfIYqh0/MTM08EoQS5S+0wWMTFWwbWMbxgoVFCoWTk9X0JTUArn0t/9gO268cPWM2wrEHVUy+UhcowSdvGnbjsgrLFKXXwuqAunkc2ldbp8xJjthZhNqQNC3bWiPHj/1k58TdEkkiCVKb98oAGB7d5tchan/dEHGNYJaJiLNVIIoZpyKzF7gb1BmuvXtQuRrqYsPI+rkRwsVtPnWhwWcJQgBIJPU5PlcsKYJre4Sg8G4hmRrLtC7RRBLlJ39Y2hN6zhvVRPas47YDeSLTlzjF/ka+sakZnDy4ltBNZEXgl4oW9DcuOZMnLyorhmbrgSWDgR8k6t8Tr6nu10+TtU1Zw6JPEGcAY8fHMZ7vv4U/vSe3ZHFNADg3t5j+Nen+ua1j97+MfRsbIOiMCmKls3R4lsXFajRydcQ10QyeZ+TB5xFR5R5xTUMNndWhQqLvO538gnnthh0jRw/VdfMCXq3COIM+NmeQTzbN4r79gzKmZl+/umRQ/j2k33z2sex0QLOX90EwKtEAeAsfq15Ql1Lm18hnEA0rpEllKHqGi3k5G3u3PdH12/GLVd2zeVUAtsbmarIbyYCv5PfuqEN/2nbOrzhVWvk4yn/8ZOTnxM08EoQZ8Cgr91AoWwBzd5jw5Nl9J0uIKUr4JzPacKQoGLaKJs2mlPOn2gu4xd5Hf5N1iTyMzj5ZJWBV4UFnTzguPE/umFzjWcRRJRkjkyVI5m8EO5MQkMuk8DfvfuKwOOidJMxdkbv57kMOXmCOAMG80UpTNOVoDju7HcGTEuGjbGCEXltLYga+OaU43hzGc/5Nofjmjlm8tUWGAln8mEn77/vTPC/tj2SyQe7WoZhjCGlq1RZcwaQyBPEHOGcY3C8iPNWOVFKITQRaUffmLzt7z0zF0Q+Lpy8ripocW83hUQ+VUM+HqhOUcIDr/GZvKJEnXwtjciqofpioqoDr8nqF6y0rlKN/BlAcQ1BzJGxgoGSYWPLmma8dHJSdncU7Do6hpaUhomSiW8/2YcjI9P43u3XzChQT74ygo98d5fs2f7nb7oQgNOBUdCeTWCiZKI5pctFNRKqUlNJYbqWGa8+kfdfCBbDyfu/mQDehKxMorokpXQVFo8OchMzQ5dFgpgjwp1vcQdFiyEnfyJfwtWbnPK/H+46jp39Y9h/YmLGbT57ZBRjBQNvvbwDI1MVPH3YiXxEXANA5tj+uCZVYw8ZIfK6Gs20xepOebenDBB07CuakvK2Oo/KFv/F5YI1zYHHvIHX6iKfTqhUWXMG0DtGEHNkwBV5Ufni7xvDuTPZ57xVTUhqCoTx7PVFOHGcyJewqjmJz7rtCfpPTwPw4hrAizj8JZS15PGAF7nEiWRKV6ApDCPTnsj7XXdnLhV7/1zxv3bjikzgsVrOJ02Z/BlBIk8Qc+REyMkXfAOvhYqFimmjPZuQKx8BzsSmmRBrnqZ1FUlNwdHRAgCgxe/kM8LJ63LQt5YaecAn8jHlh4wxNKc0ubYr4PWtEfsTkU046pkL4rWXdrVEvk0kVMX5N8P4QlpXqTnZGUAiT5zzGJYNO2ZCUzUGx0tIagrWtTlu1N+HXfRKb8sk5BqmF65pxo6+UXDOUTbju0UO5IvoyqXAGEN7NoGRKWc7ficvasubU5oUw1rKJwHPIVcbF2hKBWMSNSSma1qcc5mPyJ9235ur1rdFHktoCjIzDLoCQCpBA69nAr1jxDkN5xw3fuE3uGsOs1NPjJfQ0ZpCSndqt/1OXvRKb8smsHFFBi0pDe+9ej1OTZbx0slJXPnfHsADLw5FjmEwX0Rnq+P8/ZUnfvFd05KCpjC0+Noa1BrXpGVcEy/SzcngQKjKwiKfnPH1tSBe+/qLVkX3n9IjFTdhcmk9MBBN1Aa9Y8Q5Tdm0MZAv4oVZFt3wky9UkMskwBhDJqHFOvn2rI5P3HQBfv+abrkY9+6jeRQNCw+/dAo3XbzGtz2nWqfDjXfaXMee0pWAc33v1RuwbWMbsknfwGuNLX+9gdd4Xye+MWQSKgoVK+LYVy+Ak3//td24pLMV1563IvLYJ964Bfli94yv/8vfuShSrkrMDok8cU4jygbnUs8uFu4AhCjGOPlMAqubU1jdnELJjWheGXbWKRXdJQViILfLHeD0Z+9+skkNV21wog6Zydc68Oq2BaiWaYt9ZRKa7DbpZ02zc2yVWZYZnImEpsQKPOBcRMSFpBodrekZHyfiobiGOKcRM0tPjJdmeWbwNWJANJvUAu5ybNrZnr/XjJjdKUT+4KmpQLmiuMCIgdp2X6lkNUSr4VoHXsXzqjlxMdEqqSlIakrkeWtbnbjm1GS0Tw+xtGkIkZ8sGbh/38nAH87Z4OWhSZhWbc7m2GghsFzbcmI5H/tsCCd/Il+qOvi6s38Mjxw4Jevhp8ozO3mFhapiXNE+PDwt79t11Km2GZ2u4PGDIwA8p1rNyfvR5zjwKp4Xbk4maPKJfEpXo3GN6+SHJmq/GBJLg4YQ+YOnpvCh7+zEU6+cPmv7PDlewpv/4TH87PnBmp7/7q8/hS8/dHCRj2rhMS0bN3/lCXzl4UP1PpRFQYh8xbIxEtNN8ujpAn73a0/iD/7XDtk6eLJkygHAbEwmn8skApOJWlIaVIXh2FgBjAEKA5475owB/L8/fxHfebofuYyOFe7FQDj5lhmcvDfwWtufsK4q0FU2Q1zj7CtRxclfsT4HINj+l1geNITIX9rZiqSmBHqGLDb9p6dh86A7q4Zp2Tg5UarpuUuNl05OYqxgyNy40fC31z2Rj7rUvtPTvtsFmJaNQsXyMuxk1Mm3habsM8bQltHBubOu6ZqWlIxo+k5P44r1Odz/ZzfIC4OY8j9TJclc6+QBx81Xa4EgzichnHyoumbTyix6/+qNeP+13TXvj1gaNITIJzQFV6zPye5/Z4NBt2KiFvHLF42an7vUEIOE+TPsprjUmfD1a4kbfBX3rcgmMJgvyk6NwvlmE1pgxuvodCWQxwtEBNPu1s+L7Q7mS9iyuknWoQOLk8mL5+rVSih9cU0ixskDwMqm5LwalBH1oSFEHnC+Ru4bnAi4qsVE9BOvpSpjzC2rO9OOhPWkt9/LjhsRf1OuuIvwYL4IhQFXbchhMF+MdIfMJFQUfA3KxqaN2Hpvkcu3uTNhB/NFGJaNoclSYGYsUGMmL0ooa6yuAdzeL7NU1ySqZPLE8qVhRH57dztMm+O5Y3kAwIuDE9g7h9pnwCkP+8nuAXBfp7vevlEcHJqMPNfvxB59eRhffeSQ3HcYIZATJRO7jo7JQTcAsG2OH+8+PmNpmmnZ+NGu47HLzNXCsdECnnxlBMWKhfv2DAbO75nDp9E3Eh8jcc5lz5WxBR7UPnRqsuo3r98cOFXzt54Xjo9j3+DMn/PTh0/jq48cwjOHvTEb8b6LC3BKVzCYL+HIyDSe9j1vcLyE1c0prG/PYDBfxESoz3s2qWGqbOJHu46jbFoYK8Q7eVFh05ZJoCuXxuB4CSfHS+Ac6GwNlg7W4uTlwGuNdfLiubPVySc1p60C9YhpHBpG5MXA0IuDTre/z923D//1xy/MaRsP7h/Cn33/Oex2xZpzjg9/dxc++9N9kecKkT85XsJH796FL9x/AH/z8xdjt+sXyNv/dSf+4t+fl78/dnAYn/j+Hjxy4FTV43r80Ag+ee8ePLh/qOpzZuLLDx3En/zbLvzs+UH86T27cfDUlHzso/fsxt8/+HKV4zZwcqKElK5gdLoSuDjMl7++bx8+de+eyP3TZRN/eFcv/uGB+GMK88l7n8Pn7ot+PoHnfP85fOH+A/jMT/bK+x513/f79gwik1DRvSKLQ8NTuOMX+/Gxe3bL5w3mi+jMpdCVS2O6YmFgzPncxaBoOqFiomTik/fuwfeePYaRqXJsvbeY4NSW0dHRmkLFtKUJCTv5lU1JbF6VxSWdrVXPqT2TwIb2DC5a21z1OWEuX9eKiztaYh9rdvP/hKrgsq5WvKrK84jlR8NMhmrL6MgkVBmjHB8roDTHiRvHxwruzyK2bmjD0dECTk2WMVEyYFh2wAWJuuqKZaNi2dBVJgUgzOi0l2ePTJUxVTbksnA73Mz7eJXX+h/r7RvFmy5ZO6dzEq8fLxp46cSkPM8L1jSjZFgYnixX3be4kL2qo0XO1pyp33etGJaNXf15mLbTM8af8z53LA/T5jImmomx6QoOnpqSPWLiGMgXMThekp+PfN+POO/70dEC1rQkcdWGNvx8zyA0lbn94i2kdBUnxku4pLNFlje+7H6ra5KZvOekv/UfR2BzYOuGXOQ4ZCbva1wmCgXCIp/QFDz8qRtnPPd0QsVjf/H6GZ8T5guhJfX8+OOav3rbxXPaLrG0mZeTZ4x9gTH2EmPsecbYjxljOd9jf8kYO8QYO8AYe9O8j3T2Y0FnLo0T40WYlo2hyTJGpysoGbVPgxYXCNFlUEQVJcOW3xAEA/ki1rV5f5xvv7wTpyZLMGLq5sNRR8mw5UCm2MeJGeIJeTw1CF/s691B4p1uTCTO8+R48HzDCJG/1HWUZ7qUXZj9JyZQNCwYFo+ULYr348jINEamZp54Izo7Dk2Uqs5XEAPHb7+8E0XDwrg7CO5/L5tTOrZ3t2GybMpzdKIU7jYOS8t2uy+dnJSvAYKLXPSfdkokt8aUGbaHMnnnGJxj87fyrRf+gVeisZjvJ/oAgEs555cDeBnAXwIAY+xiALcCuATAmwH8E2Os9vDwDBFVC6cmyzK/nstgp5ezC1Edlf/pd/imok+WDEyWTFkzvKYlie2b2mHz+MkiY9OVSO3xQL6Iimljz/G8s8/x6scpjmfvwPicLlqAkz0PumK+z40Hwud5sopIiscv6WyR57EQBJfHC75fvf2jciGM2Xqw73BF0ubAUJWZmL19Y8gmVLz+otUAfO+7b/ykOaWhZ2N74HWD+SJOT1dQMW10tDpxDQAckCLvimJo0Y4L1zQHJkIJ/NU1QuSfPz6OXEZfkG9H88VfJ080FvP6RDnnv+aci9KCpwGsc2/fAuB7nPMy5/wIgEMArp7PvmqhK5fGQL4UEHYRq0yWDNzxixcjixUDwPPH8/jmY4el0B4fK+KOX7yI+/cN4drzVmBdW1q6xsF8EX/+AydT39btCEPPxnYpAg+8OIRvPPpKYPujhQpWNiWxpjkpB7QG80XsGxxHybChKQwDPrH7+fOD+Ojdu/CtJ444zx0vQVMYDItjz7E8/vnxw/jo3bvw0bt34bvP9ANwBPi//3I/ChUTd/ziRXzsnt3Y2T8mhQoAzNCFTwxu2hz40e4BfH/H0cBxD46XkNAUnOf2TR+drmDX0TH8r/84Ip9j2Rx/d/8BHBst4DtP9eGjd+/CT3YPyMcn3Pfdv0Tezv7RwPsw6jv23UfzuPmKTiQ0JTIwO112zm2iZOCLvz6An+wekNsR30bGC4bcFuA49q0b27Ch3WkL/OjLw/jwd3ehbNrytc0pHevb01jt+3yePjKKT//QGdPpzKWxsikJXWVyPEOIoqitf43bk2V7d/BiIRCZfC6joy2jywtZ5xLpx5JNaGDMGXglGouFtBAfAPB993YXHNEXHHfvi8AYux3A7QCwYcOGeR1AZy6NkakyjviqRYSQ3b9vCN98/Agu7mzBO69aF3jdNx47jF88f0LWHD91+DQeeukUunJpvHvbejy4fwiPHxwB5xw/eW4Av9p3Epd2teC3LlqN3is78c6ruuRX7i/cfwCFioV3bu2SU8HHpitoy+p4w0VrYHOOf3z4EAbzRbkwxPVbVmKvLw76n79+GUdGpvHrfUP4P1+9AYP5Iq7bshK/OTCMxw+O4OuPvoLWtA6bczx6YBi3bt+A+/YM4s7HDiOb0PDNxx0Rtm2O22/YHHmfvPJP78Lyufv2gXPgXVvXybEHEVWIqGGsUMF3nu7HQ/uH8J+2rUNzSscLA+P4yiOHYFg2/vWpfhQNC7uP5vGOq5yP+1d7T+Kbjx/BpV2tuOXKLnDOsaNvDNdvWYlHDgxjMF/Efc8NyGOfKpt4zXkrcWRkOjK57YEXnc+wKanjyw8fwpqWJD5w3Sbc+dhhDOSL6AHw8xec9+HK9Tlct2UlXjo5gY+/YQs63M/nKw8fQsW0cdWGHFY3J3H/viE0pzQwxvCB6zahULHw5YcO4p8fP4yiYeGyrlZcuT4HRWHoXpHFwVNT7qxQ5//K/7F9PZ47lsffvfsK/NVPXsA7t8b+N8cV63K4fstKXLkhB8YY3rV1HZ4+fBpvu6Ij9vlnG0VhuHX7ely/ZWW9D4VYYGZ18oyxBxlje2P+3eJ7zmcAmAC+K+6K2VRsaQbn/E7OeQ/nvGfVqmif6bkgBuB2Hc3L+4RrFa4wHAE4ZYLOY0U3ChENp753+zV46+Ud2LaxDSNTZRwdLWBn3xg2r8ri5x+7Hl25NL5061W48cLVcmBOvHanbz+jBad2+hM3XYBP3nQBEpqCE+Ml9PaNYUN7Blesz2F4soyyacmL1LaNbahYTqxwcryEiztacP7qJtz97FGYNscX3n05Pvv2izFZNvHy0KTMmEVf9J6NbejtH42f4ON+Yznhi4gKFQtFwwqsRXoiX0RHa0qW/52eqmBn/xhs7rTNdd5P5737wc7jKBoWtm1scwY7xfvuvg/ifT82WsTwZBm/9ao1yLoD5ZFj727Dto3t2Dc4Hlg/VWTY4nn/ctt2fPwNW9zPuRTZ3+6jeXDufNNamU0ioSooVCxs727Hjz/8Wly01omhRKXMH7/uPHzypguwsimJQsXCRWtb8LOPXSerZba58Zx/Jur69gz+7Q9fjbWtKfzzbduxdUP8tP8VTUl854Ovlhf+//7Oy/Dwp27Eh288P/b59eBv33W5jLWIxmFWkeecv5FzfmnMv58CAGPsNgBvA/B73KuxOw5gvW8z6wDU1uRlHojIpLdvFC0pDaubk/Lr9I6Q2AiOjxUxNOHluWLdzjUtSTmwKr6CP3tkFL39Y9i+MfqVPJvUAivQ+13o2HRFZrKMMXS2pnA8X0Rv/yh6uttkRjs0XpbH9yHXgf/vvSdh2hyduTS2d7fJmvttG9pljtzbNyrFdnS6grUtKdxyZSeGJsp4xq0iEed1/uomnBwvwbKdQUVxf9xxD+adiTotaR2MOU21xP7F/sTxivvFcQvhFrm5GNMQP7d3t6Ejl8ZAvhDYxtoWJ//e3t3mxFPumEV4X9mEiovWNiOb1NCa1uUFS+yvt995T1SF4coNjhMXbn57tyPE4v9LuH1AV+h5gh73/0GjTgwjGpP5Vte8GcB/AXAz57zge+g+ALcyxpKMsU0AtgB4dj77qgUhlgdPTaEzl3ZmFo4XMTZdwaFTU2jL6Hj51CTGfVUiImsX/UbEH3bPxna5DuWW1U1oSWn4/o5jGC8a2NYd79a8lX30QJ4cniDTmUvj6VdOY2SqEsjzB/JF7OwfRUJT8LoLV2Hzyix+tse5Nnbl0tjmivoFa5rQmtGxri2NNS1J3LdnECfGS/IctnW3SUH62Z5BpHVV1kdv726DaXMMT5YxmC9iy+omtKa9Gm5x3P7ZmKrCkEvrckWjtoyO3v4x51tQ/5jcb1cujd+6aDUyCRU7+0ZxeqqMw8PTaMvoODA0iYmSgd7+MTSnNFywuhmduTR6+8ZwciJ47Iwx6ZrF5zNeNHBgaFI+76oNbbIPi5hBOjRRwrHRItoyOvYNTuCxgyN4VUezFHHx+YixFPH/JTyzVNy/LVQlQ825iOXIfIfSvwKgGcADjLHnGGNfBwDO+T4A9wJ4EcCvAHyEc77oS7qs9dVLd7plb88cGcXNX30CAHDba7rBOfC2rzyOm774KG764qP4f37+IpqSGt611cnphZD6/8AVhWHrxjbpTqv9sXfmnCXh3tOzHi8MjMt9TJbMgMvvaE3L9S57uttkzPTJe5/D3c8cxeVdrUhqKrZtbJPP68il5H7FMTLG0LOxXbrv217TLY/vgjXNaE5qOD1dQUcu5RMu57Xv+cZT6DtdQEdrGh2tKWxckcHrLliFB/efwk1ffBRv+vvHArMx27IJlE0buYyOm6/oxLNHRvHGLz6KkamybFrV0+0I75Xrc/jBzuN4xz/9R/B9//IT+MnuAWzd0AZFcb7RiPOT23DPMZdJ4PzVTfj6o6/gpi8+irf/4xPg3DtH/+fT2ZrCE4dG8Ltfe1Luz7KdQWp/1UxHLgXmtigQvwPRmaUieusJDaJuXJGJ/dwJYikzr4FXznnVQJFzfgeAO+az/bmS0lV84o0X4MDQBN7Ts142cQKA112wCrffsBnDk+VA3foWANduXoEbLliFtK7ibZd34MDJCdxyZWdg2x+64TxkExrWtaWxaWU2dv8feO0mvOFVa3Dt5hU4MV6CaTtVLRd1tOCtl3kDbO+9ej1KhoWO1hTOX+XEJf/Xa7pxatKJlt7d4yRdt72mGwXDQnsmgfNXNUFVGD7xxgvw5ku9CVEfuG4TAGB1SxK337AZUyUTN1/RCVVh+C9vuQhPvjKC11+4GldtyKE5peFNl6zBM4fXYbpi4vJ1rXjX1i70dLeBAejIpWHYXM5svWJ9Tma0f3zDefjNy6dw/ZZV2LaxDaMFA5Zt44p1Obz/2o1I6SquO98ZtPuTG8/DPc86lTo3XrAat9+wGacmy8gXKmBg+L1rnAH292xfj8myidXNSXzodZsxVXaOXfDJmy7Az32tnK/bshK337AZRcPCe7Z7aeD7X9MtSxnfcJGzv6GJEqbKFt57tTeY/75rNuLSzlZZ4rhpRRZ/+lvn47dDE8ze3bMOK5oS8huWgDGGL916Ja0zSiwr2EJOVZ8vPT09vLe3t96HQRAEsaxgjO3knPfEPUYzHwiCIBoYEnmCIIgGhkSeIAiigSGRJwiCaGBI5AmCIBoYEnmCIIgGhkSeIAiigSGRJwiCaGCW1GQoxtgwgP55bGIlgJEFOpylRiOfG0Dnt9yh86svGznnsW18l5TIzxfGWG+1WV/LnUY+N4DOb7lD57d0obiGIAiigSGRJwiCaGAaTeTvrPcBLCKNfG4And9yh85vidJQmTxBEAQRpNGcPEEQBOGDRJ4gCKKBaQiRZ4y9mTF2gDF2iDH26Xofz0LAGOtjjL3gLqvY697Xzhh7gDF20P25bBYdZYx9izF2ijG213df1fNhjP2l+3keYIy9qT5HXTtVzu9zjLEB9zN8jjH2O77Hltv5rWeMPcIY288Y28cY+7h7/7L/DGc4t8b4/Djny/ofABXAKwA2A0gA2APg4nof1wKcVx+AlaH7/geAT7u3Pw3g/6v3cc7hfG4AsBXA3tnOB8DF7ueYBLDJ/XzVep/DGZzf5wD855jnLsfz6wCw1b3dDOBl9zyW/Wc4w7k1xOfXCE7+agCHOOeHOecVAN8DcEudj2mxuAXAXe7tuwC8o36HMjc4548BGA3dXe18bgHwPc55mXN+BMAhOJ/zkqXK+VVjOZ7fCc75Lvf2JID9ALrQAJ/hDOdWjWVzbkBjxDVdAI75fj+OmT+g5QIH8GvG2E7G2O3ufWs45ycA5z8mgNV1O7qFodr5NNJn+lHG2PNunCOijGV9foyxbgBXAXgGDfYZhs4NaIDPrxFEnsXc1wh1oa/lnG8F8BYAH2GM3VDvAzqLNMpn+jUA5wG4EsAJAP/TvX/Znh9jrAnADwH8Ged8Yqanxty3pM8x5twa4vNrBJE/DmC97/d1AAbrdCwLBud80P15CsCP4XwdHGKMdQCA+/NU/Y5wQah2Pg3xmXLOhzjnFufcBvBNeF/pl+X5McZ0OCL4Xc75j9y7G+IzjDu3Rvn8GkHkdwDYwhjbxBhLALgVwH11PqZ5wRjLMsaaxW0Avw1gL5zzus192m0AflqfI1wwqp3PfQBuZYwlGWObAGwB8Gwdjm9eCPFzeSeczxBYhufHGGMA/gXAfs75F30PLfvPsNq5NcznV++R34X4B+B34IyIvwLgM/U+ngU4n81wRu/3ANgnzgnACgAPATjo/myv97HO4ZzugfOV14DjhD440/kA+Iz7eR4A8JZ6H/8Znt93ALwA4Hk4wtCxjM/vOjiRxPMAnnP//U4jfIYznFtDfH7U1oAgCKKBaYS4hiAIgqgCiTxBEEQDQyJPEATRwJDIEwRBNDAk8gRBEA0MiTxBEEQDQyJPEATRwPz/uQXnIZ6DOtQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-5:]) > 16.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
